*	************************************************************************
* 	File-Name: 		.do
*	Date:  			04/26/2016
*	Authors: 		Eugenie Dugoua
*	Data Used:  		Data_Merged_7_05.dta
*	Purpose:   		Analysis
*	Software:		Stata SE 12.1
*	Machine:		Stata for Windows through Linux Wine
*	************************************************************************

*	************************************************************************
*	DIRECTORY                                                               
*	************************************************************************
cd "/home/emd/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)"
clear
use "./Data/Python_code_merging/Data_Merged_11_20"

*JU FILE PATH, WD
*use "/home/jurpelai/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)/Data/Python_code_merging/Data_Merged_8_13.dta"
*cd "/home/jurpelai/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)"

*RK File Path, WD
*cd "C:/Users/Ryan/Dropbox/Night Lights Validation (Eugenie, Ryan, Johannes)"
*clear
*use "./Data/Python_code_merging/Data_Merged_8_13"


*	************************************************************************
*	CREATING VARIABLE                                          
*	************************************************************************

*JU NOTE: THIS IS NOT NEEDED AFTER HOUSEHOLD NUMBERS ARE CORRECTED
* replace electrified_hh_nbr = electrified_hh_nbr/100 
* python code was corrected and is reflected in Data_Merged_8_13.dta

replace RGGVY_elect_status = 0 if RGGVY_elect_status==-1
gen electrified_hh_nbr_log = log(electrified_hh_nbr + 1)

gen log_dist = log(c01_2001_dist_town+1)

gen SH_sum_11_log = log(SH_sum_11 + 1)
gen SH_sum_11_log2 = SH_sum_11_log * SH_sum_11_log
gen SH_sum_11_log3 = SH_sum_11_log * SH_sum_11_log * SH_sum_11_log
gen SH_sum_11_log4 = SH_sum_11_log * SH_sum_11_log * SH_sum_11_log *  SH_sum_11_log

replace API_mean_2011 = 0 if API_mean_2011 < 0
gen API_mean_2011_log = log(API_mean_2011 + 1)
gen API_mean_2011_log2 = API_mean_2011_log^2
gen API_mean_2011_log3 = API_mean_2011_log^3
gen API_mean_2011_log4 = API_mean_2011_log^4
gen API_max_2011_log = log(API_max_2011)
gen API_max_2011_log2 = API_max_2011_log^2
gen API_max_2011_log3 = API_max_2011_log^3
gen API_max_2011_log4 = API_max_2011_log^4


*JU NOTE: THIS VARIABLE IS NOT USEFUL
*gen SH_sum_11_logsqrt = sqrt(SH_sum_11_log)

gen SH_sum_11_log_nonzero = .
replace SH_sum_11_log_nonzero = SH_sum_11_log if SH_sum_11_log > 0 & SH_sum_11_log != .
gen SH_sum_11_logdummy = .
replace SH_sum_11_logdummy = 1 if SH_sum_11_log == 0
replace SH_sum_11_logdummy = 0 if SH_sum_11_log > 0 & SH_sum_11_log !=.

gen API_mean_2011_log_nonzero = .
replace API_mean_2011_log_nonzero = API_mean_2011_log if API_mean_2011_log > 0 & API_mean_2011_log != .
gen API_mean_2011_logdummy = .
replace API_mean_2011_logdummy = 1 if API_mean_2011_log == 0
replace API_mean_2011_logdummy = 0 if API_mean_2011_log > 0 & API_mean_2011_log != .

gen nonelectrified_hh_nbr_log = .
replace nonelectrified_hh_nbr_log = log((100 - electrified_hh_per)/100 * c11_2011_total_hhs + 1)

label variable SH_sum_11_log2 "Night lights 2 (shape, log sum, 2011)"
label variable SH_sum_11_log3"Night lights 3 (shape, log sum, 2011)"
label variable SH_sum_11_log4 "Night lights 4 (shape, log sum, 2011)"

label variable API_mean_2011_log2 "India Lights 2 (Log Mean)"
label variable API_mean_2011_log3 "India Lights 3 (Log Mean)"
label variable API_mean_2011_log4 "India Lights 4 (Log Mean)"

label variable API_max_2011_log2 "India Lights 2 (Log Maximum)"
label variable API_max_2011_log3 "India Lights 3 (Log Maximum)"
label variable API_max_2011_log4 "India Lights 4 (Log Maximum)"

*	*************************************
* 	LABELLING FOR TABLES
*	*************************************
label variable electrified_hh_per "Electrified HH (\%)"
label variable electrified_hh_nbr_log "Electrified HH (log nbr)"
label variable nonelectrified_hh_nbr_log "Non-electrified HH (log nbr)"
label variable electrified_hh_nbr "Electrified HH (nbr)"
label variable SH_sum_11_log "Night lights (shape, log sum, 2011)"
label variable c11_2011_total_area "Area (ha)"
label variable c01_2001_dist_town "Distance town (km)"
label variable log_dist "Distance town (log km)"
label variable c11_2011_total_hhs "Nbr households"
label variable c11_2011_total_pop "Nbr people"
label variable c11_2011_total_sc_pop "SC population"
label variable c11_2011_total_st_pop "ST population"
label variable c11_2011_p_lit "Literate population"
label variable c11_2011_power_all "Village Electrified (=1)"
label variable c11_2011_asset_availing_bank "HH with banking (\%)"
label variable SH_sum_11_log_nonzero "No night lights (shape, sum, 2011)"
label variable SH_sum_11_logdummy "Night lights (shape, sum) (=0)"
label variable SH_sum_11_log "Night lights (shape, log sum, 2011)"
label variable API_mean_2011_log_nonzero "No India Lights (Mean, 2011)"
label variable API_mean_2011_logdummy "India Lights (Mean) (=0)"
label variable SH_mean_11 "Night lights (shape, mean, 2011)"
label variable SH_std_11 "Night lights (shape, sd, 2011)"
label variable SH_sum_11 "Night lights (shape, sum, 2011)"
label variable PTval_11 "Night lights (point value, 2011)"
label variable BIptval_11 "Night lights (bilinear, 2011)"
label variable _2K_mean_11 "Night lights (2km, mean, 2011)"
label variable _2K_std_11 "Night lights (2km, sd, 2011)"
label variable _2K_sum_11 "Night lights (2km, sum, 2011)"
label variable _3K_mean_11 "Night lights (3km, mean, 2011)"
label variable _3K_std_11 "Night lights (3km, sd, 2011)"
label variable _3K_sum_11 "Night lights (3km, sum, 2011)"
label variable _5K_mean_11 "Night lights (5km, mean, 2011)"
label variable _5K_std_11 "Night lights (5km, sd, 2011)"
label variable _5K_sum_11 "Night lights (5km, sum, 2011)"
label variable API_mean_2011 "Night lights (API, mean, 2011)"
label variable API_max_2011 "Night Light s(API, max, 2011)"
label variable API_mean_ofmonthly_sd_2011 "Night lights (API, sd, 2011)"
label variable RGGVY_dummy "RGGVY in Village"
label variable c11_2011_asset_radio_transitor "HH with radio (\%)"
label variable c11_2011_asset_tv "HH with TV (\%)"
label variable c11_2011_asset_landline_phone "HH with landline phone (\%)"
label variable c11_2011_asset_none "HH without assets (\%)"

*	************************************************************************
*	SUMMARY STATISTICS
*	************************************************************************

*** OTHER TABLE

eststo clear
quietly estpost sum ///
electrified_hh_per electrified_hh_nbr electrified_hh_nbr_log ///
c11_2011_total_area c01_2001_dist_town ///
c11_2011_total_hhs c11_2011_total_pop c11_2011_total_sc_pop ///
c11_2011_total_st_pop c11_2011_p_lit c11_2011_power_all   ///
c11_2011_asset_availing_bank c11_2011_asset_none ///
c11_2011_asset_radio_transitor c11_2011_asset_tv
esttab using "./Manuscript/Tables/summarystats.tex", ///
replace nonum cells("count mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))") ///
noobs label booktabs

*** LIGHTS TABLE

eststo clear
quietly estpost sum ///
SH_mean_11 SH_std_11 SH_sum_11 PTval_11 BIptval_11 ///
_2K_mean_11 _2K_std_11 _2K_sum_11 ///
_3K_mean_11 _3K_std_11 _3K_sum_11 ///
_5K_mean_11 _5K_std_11 _5K_sum_11 ///
API_mean_2011 API_mean_ofmonthly_sd_2011
esttab using "./Manuscript/Tables/summarystats_lights.tex", ///
replace nonum cells("count mean(fmt(2)) sd(fmt(2)) min(fmt(2)) max(fmt(2))") ///
noobs label booktabs


*** COVERAGE OF NIGHT LIGHT VARIABLES BY STATE
label variable St11 "State"
label define statenames 1 "Jammu \& Kashmir" 2 "Himachal Pradesh" 3 "Punjab" 4 "Chandigarh" 5 "Uttarakhand" 6 "Haryana" 7 "Delhi" 8 "Rajasthan" 9 "Uttar Pradesh" 10 "Bihar" 11 "Sikkim" 12 "Arunachal Pradesh" 13 "Nagaland" 14 "Manipur" 15 "Mizoram" 16 "Tripura" 17 "Meghalaya" 18 "Assam" 19 "West Bengal" 20 "Jharkhand" 21 "Orissa" 22 "Chhattisgarh" 23 "Madhya Pradesh" 24 "Gujarat" 25 "Daman \& Diu" 26 "Dadra \& Nagar Haveli" 27 "Maharastra" 28 "Andhra Pradesh" 29 "Karnataka" 30 "Goa" 31 "Lakshadweep" 32 "Kerala" 33 "Tamil Nadu" 34 "Puducherry" 35 "Andaman \& Nicobar Islands"
label values St11 statenames
eststo clear
quietly estpost tabstat SH_sum_11 API_mean_2011 BIptval_11 St11 if c11_2011_total_hhs > 0, by(St11) statistics(count)
esttab using "./Manuscript/Tables/summarystats_ltscover(bystate).tex", ///
replace cells("SH_sum_11 API_mean_2011 BIptval_11 St11") varwidth(30) ///
noobs nonum booktabs

***** For Boxplots
* Cut main IVs of interest [Note: need to add maximum value for this to work correctly.]
codebook SH_sum_11_log
centile SH_sum_11_log, centile(95)
centile SH_sum_11_log, centile(99)
egen SH_sum_11_cat = cut(SH_sum_11_log), at(0, 2.3, 3.5, 4.4, 4.9, 5.8) label
codebook API_mean_2011_log
centile API_mean_2011_log, centile(95)
centile API_mean_2011_log, centile(99)
egen API_mean_2011_cat = cut(API_mean_2011_log), at(0, 0.1, 0.7, 1.3, 2.0, 2.4, 3.2) label
codebook API_max_2011_log
centile API_max_2011_log, centile(95)
centile API_max_2011_log, centile(99)
egen API_max_2011_cat = cut(API_max_2011_log), at(2.0, 2.2, 2.7, 3.3, 3.9, 4.1) label

* Boxplots with API data versus shapefile data
graph box electrified_hh_nbr_log, over(SH_sum_11_cat) ytitle(log(Number Electrified Households)) title(Sum of DN with Shapefile) scheme(s1mono) name(hhsh)
graph box electrified_hh_nbr_log, over(API_mean_2011_cat) ytitle(log(Number Electrified Households)) title(Mean of DN with India Lights) scheme(s1mono) name(hhio)
graph box electrified_hh_nbr_log, over(API_max_2011_cat) ytitle(log(Number Electrified Households)) title(Maximum of DN with India Lights) scheme(s1mono) name(hhiom)

graph combine hhsh hhio hhiom, scheme(s1mono) xsize(7) ysize(4)


save "./Data/Python_code_merging/Data_Merged_11_20_stata", replace

